#### installing packages if not installed ####

list.of.packages <- c('dplyr',
                      'estimatr',
                      'tidyverse',
                      'haven',
                      'readxl',
                      'ggthemes',
                      'rdrobust',
                      'gridExtra',
                      'lubridate')
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages)


#### purpose: analyzing austin racial profiling data #### 

#### attach packages ####

suppressPackageStartupMessages(
  
  {
    
    library(dplyr)
    library(estimatr)
    library(tidyverse)
    library(haven)
    library(readxl)
    library(ggthemes)
    library(rdrobust)
    library(gridExtra)
    library(lubridate)
    
  }
  
)

#### loading datasets #####

load(file = "data/rpf.RData")
load(file = "data/crimeatx.RData")

#### descriptive statistics #### 

# descriptives: stops 

rpf_ts = rpf %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE)) %>% 
  filter(date >= as.Date("2020-03-23"))

rpf_ts$blmrv = 
  rpf_ts$date - min(rpf_ts$date[rpf_ts$blm == 1])
rpf_ts = rpf_ts %>% filter(blmrv <= 67)

desc_atxp1 = rpf_ts %>% 
  ggplot() + 
  geom_point(aes(x = date, y = count),
             size = .3,
             alpha = .3) + 
  geom_smooth(aes(x = date, y = count, group = blm),
              se = FALSE,
              col = 'black',
              size = .3) + 
  geom_vline(xintercept = as.Date("2020-05-29"),
             linetype = 2,
             size = .3) + 
  labs(x = "Date", y = "Traffic Stops",
       title = "A. Traffic Stops") + 
  theme_tufte(base_size = 9)

# descriptives: search rates

# descriptives: hit rates (not conditional on search)

load(file = "data/rpf.RData")

rpf_ts_hit = rpf %>% 
  group_by(date) %>% 
  summarize(hit = mean(search_found, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE)) %>% 
  filter(date >= as.Date("2020-03-23"))

rpf_ts_hit$blmrv = 
  rpf_ts_hit$date - min(rpf_ts_hit$date[rpf_ts_hit$blm == 1])
rpf_ts_hit = rpf_ts_hit %>% filter(blmrv <= 67)

desc_atxp2 = rpf_ts_hit %>% 
  ggplot() + 
  geom_point(aes(x = date, y = hit),
             size = .3,
             alpha = .3) + 
  geom_smooth(aes(x = date, y = hit, group = blm),
              se = FALSE,
              col = 'black',
              size = .3) + 
  geom_vline(xintercept = as.Date("2020-05-29"),
             linetype = 2,
             size = .3) + 
  labs(x = "Date", y = "Hit Rate",
       title = "B. Hit Rate") + 
  theme_tufte(base_size = 9)

# descriptives: hit rates (conditional on search)

load(file = "data/rpf.RData")

rpf_ts_hit = rpf %>% 
  group_by(date) %>% 
  summarize(hit = mean(search_found, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE)) %>% 
  filter(date >= as.Date("2020-03-23"))

rpf_ts_hit$blmrv = 
  rpf_ts_hit$date - min(rpf_ts_hit$date[rpf_ts_hit$blm == 1])
rpf_ts_hit = rpf_ts_hit %>% filter(blmrv <= 67)

rpf_ts_hit %>% 
  ggplot() + 
  geom_point(aes(x = date, y = hit),
             size = .3,
             alpha = .3) + 
  geom_smooth(aes(x = date, y = hit, group = blm),
              se = FALSE,
              col = 'black',
              size = .3) + 
  geom_vline(xintercept = as.Date("2020-05-29"),
             linetype = 2,
             size = .3) + 
  labs(x = "Date", y = "Hit Rate | Search",
       title = "B. Hit Rate | Search") + 
  theme_tufte(base_size = 9)

# descriptives: searche rate 

load(file = "data/rpf.RData")

rpf_ts_hit = rpf %>% 
  group_by(date) %>% 
  summarize(search = mean(search, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE)) %>% 
  filter(date >= as.Date("2020-03-23"))

rpf_ts_hit$blmrv = 
  rpf_ts_hit$date - min(rpf_ts_hit$date[rpf_ts_hit$blm == 1])
rpf_ts_hit = rpf_ts_hit %>% filter(blmrv <= 67)

rpf_ts_hit %>% 
  ggplot() + 
  geom_point(aes(x = date, y = search),
             size = .3,
             alpha = .3) + 
  geom_smooth(aes(x = date, y = search, group = blm),
              se = FALSE,
              col = 'black',
              size = .3) + 
  geom_vline(xintercept = as.Date("2020-05-29"),
             linetype = 2,
             size = .3) + 
  labs(x = "Date", y = "Search Rate",
       title = "Search Rate") + 
  theme_tufte(base_size = 9)

load(file = "data/rpf.RData")

rpf_ts_hit = rpf %>% 
  group_by(date) %>% 
  summarize(search = sum(search, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE)) %>% 
  filter(date >= as.Date("2020-03-23"))

rpf_ts_hit$blmrv = 
  rpf_ts_hit$date - min(rpf_ts_hit$date[rpf_ts_hit$blm == 1])
rpf_ts_hit = rpf_ts_hit %>% filter(blmrv <= 67)

rpf_ts_hit %>% 
  ggplot() + 
  geom_point(aes(x = date, y = search),
             size = .3,
             alpha = .3) + 
  geom_smooth(aes(x = date, y = search, group = blm),
              se = FALSE,
              col = 'black',
              size = .3) + 
  geom_vline(xintercept = as.Date("2020-05-29"),
             linetype = 2,
             size = .3) + 
  labs(x = "Date", y = "Searches",
       title = "Searches") + 
  theme_tufte(base_size = 9)

# descriptives: arrest rates

load(file = "data/rpf.RData")

rpf_ts_arr = rpf %>% 
  group_by(date) %>% 
  summarize(arrest = mean(arrest, na.rm = TRUE),
            blm = mean(blm, na.rm = TRUE)) %>% 
  filter(date >= as.Date("2020-03-23"))

rpf_ts_arr$blmrv = 
  rpf_ts_arr$date - min(rpf_ts_arr$date[rpf_ts_arr$blm == 1])
rpf_ts_arr = rpf_ts_arr %>% filter(blmrv <= 67)

desc_atxp3 = 
  rpf_ts_arr %>% 
  ggplot() + 
  geom_point(aes(x = date, y = arrest),
             size = .3,
             alpha = .3) + 
  geom_smooth(aes(x = date, y = arrest, group = blm),
              se = FALSE,
              col = 'black',
              size = .3) + 
  geom_vline(xintercept = as.Date("2020-05-29"),
             linetype = 2,
             size = .3) + 
  labs(x = "Date", y = "Arrest Rate",
       title = "C. Arrest Rate") + 
  theme_tufte(base_size = 9)


# descriptives: black/white stop rate ratio: based on 2020 census 

rpf_rr = rpf %>% 
  group_by(date) %>% 
  summarize(r_blk = sum(r_blk),
            r_lat = sum(r_lat),
            r_wht = sum(r_wht),
            blm = mean(blm)) %>% 
  mutate(r_blk_rate = (r_blk / 67329),
         r_wht_rate = (r_wht / 453034),
         r_lat_rate = (r_lat / 312603)) %>% 
  mutate(rr_bw = r_blk_rate / r_wht_rate,
         rr_lw = r_lat_rate / r_wht_rate) %>% 
  filter(date >= as.Date("2020-03-23"))

rpf_rr$blmrv = 
  rpf_rr$date - min(rpf_rr$date[rpf_rr$blm == 1])
rpf_rr = rpf_rr %>% filter(blmrv <= 67)

desc_atxp4 = 
  rpf_rr %>% 
  ggplot() + 
  geom_point(aes(x = date, y = rr_bw),
             size = .3,
             alpha = .3) + 
  geom_smooth(aes(x = date, y = rr_bw, group = blm),
              se = FALSE,
              col = 'black',
              size = .3) + 
  geom_vline(xintercept = as.Date("2020-05-29"),
             linetype = 2,
             size = .3) + 
  labs(x = "Date", y = "B/W Stop Rate Ratio",
       title = "D. Black/White Stop Rate Ratio") + 
  theme_tufte(base_size = 9)

# descriptives: black/white stop rate ratio: based on 2020 census 

rpf_rr = rpf %>% 
  group_by(date) %>% 
  summarize(r_blk = sum(r_blk),
            r_lat = sum(r_lat),
            r_wht = sum(r_wht),
            blm = mean(blm)) %>% 
  mutate(r_blk_rate = (r_blk / 67329),
         r_wht_rate = (r_wht / 453034),
         r_lat_rate = (r_lat / 312603)) %>% 
  mutate(rr_bw = r_blk_rate / r_wht_rate,
         rr_lw = r_lat_rate / r_wht_rate) %>% 
  filter(date >= as.Date("2020-03-23"))

rpf_rr$blmrv = 
  rpf_rr$date - min(rpf_rr$date[rpf_rr$blm == 1])
rpf_rr = rpf_rr %>% filter(blmrv <= 67)

desc_atxp5 = 
  rpf_rr %>% 
  ggplot() + 
  geom_point(aes(x = date, y = rr_lw),
             size = .3,
             alpha = .3) + 
  geom_smooth(aes(x = date, y = rr_lw, group = blm),
              se = FALSE,
              col = 'black',
              size = .3) + 
  geom_vline(xintercept = as.Date("2020-05-29"),
             linetype = 2,
             size = .3) + 
  labs(x = "Date", y = "L/W Stop Rate Ratio",
       title = "E. Latino/White Stop Rate Ratio") + 
  theme_tufte(base_size = 9)

# descriptives: crime (against person)

crime_atx_ts1 = crime_atx %>% 
  filter(crime_category == "person") %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(blm = mean(blm, na.rm = TRUE),
            count = sum(count, na.rm = TRUE)) %>% 
  filter(date >= as.Date("2020-03-23"))

crime_atx_ts1$blmrv = 
  crime_atx_ts1$date - min(crime_atx_ts1$date[crime_atx_ts1$blm == 1])
crime_atx_ts1 = crime_atx_ts1 %>% filter(blmrv <= 67)

desc_atxp6 = 
  crime_atx_ts1 %>% 
  ggplot() + 
  geom_point(aes(x = date, y = count),
             size = .3,
             alpha = .3) + 
  geom_smooth(aes(x = date, y = count, group = blm),
              se = FALSE,
              col = 'black',
              size = .3) + 
  geom_vline(xintercept = as.Date("2020-05-29"),
             linetype = 2,
             size = .3) + 
  labs(x = "Date", y = "Crime (Against Person)",
       title = "D. Crime (Against Person)") + 
  theme_tufte(base_size = 9)

# descriptives: crime (against property)

crime_atx_ts2 = crime_atx %>% 
  filter(crime_category == "property") %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(blm = mean(blm, na.rm = TRUE),
            count = sum(count, na.rm = TRUE)) %>% 
  filter(date >= as.Date("2020-03-23"))

crime_atx_ts2$blmrv = 
  crime_atx_ts2$date - min(crime_atx_ts2$date[crime_atx_ts2$blm == 1])
crime_atx_ts2 = crime_atx_ts2 %>% filter(blmrv <= 67)

desc_atxp7 = 
  crime_atx_ts2 %>% 
  ggplot() + 
  geom_point(aes(x = date, y = count),
             size = .3,
             alpha = .3) + 
  geom_smooth(aes(x = date, y = count, group = blm),
              se = FALSE,
              col = 'black',
              size = .3) + 
  geom_vline(xintercept = as.Date("2020-05-29"),
             linetype = 2,
             size = .3) + 
  labs(x = "Date", y = "Crime (Against Property)",
       title = "E. Crime (Against Property)") + 
  theme_tufte(base_size = 9)

# descriptives: crime (against society)

crime_atx_ts3 = crime_atx %>% 
  filter(crime_category == "society") %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(blm = mean(blm, na.rm = TRUE),
            count = sum(count, na.rm = TRUE)) %>% 
  filter(date >= as.Date("2020-03-23"))

crime_atx_ts3$blmrv = 
  crime_atx_ts3$date - min(crime_atx_ts3$date[crime_atx_ts3$blm == 1])
crime_atx_ts3 = crime_atx_ts3 %>% filter(blmrv <= 67)

desc_atxp8 = 
  crime_atx_ts3 %>% 
  ggplot() + 
  geom_point(aes(x = date, y = count),
             size = .3,
             alpha = .3) + 
  geom_smooth(aes(x = date, y = count, group = blm),
              se = FALSE,
              col = 'black',
              size = .3) + 
  geom_vline(xintercept = as.Date("2020-05-29"),
             linetype = 2,
             size = .3) + 
  labs(x = "Date", y = "Crime (Against Society)",
       title = "F. Crime (Against Society)") + 
  theme_tufte(base_size = 9)


grob_atx_desc = arrangeGrob(desc_atxp1, desc_atxp2, desc_atxp3,
            desc_atxp4, desc_atxp5, desc_atxp6,
            grid::nullGrob(), desc_atxp7, desc_atxp8, grid::nullGrob(), 
            layout_matrix = matrix(c(1, 1, 2, 2, 3, 3,
                                     4, 4, 5, 5, 6, 6,
                                     7, 8, 8, 9, 9, 10),
                                   byrow = TRUE, ncol = 6, nrow = 3))

ggsave(plot = grob_atx_desc, filename = "pics/atxdesc.png", width = 8, height = 6)

#### rdit: cct #### 

# polynomial: 0, 1, 2, 3
# kernel: 0, 1, 2, 3

# traffic stop dataset

stops_cct = rpf %>% 
  mutate(count = 1) %>% 
  group_by(date) %>% 
  summarize(count = sum(count, na.rm = TRUE),
            blm = mean(blm)) %>% 
  mutate(count_stops = count)

stops_cct$blmrv = 
  stops_cct$date - min(stops_cct$date[stops_cct$blm == 1])

# hit rate 

hr_cct = rpf %>% 
  group_by(date) %>% 
  summarize(search_found = mean(search_found),
            blm = mean(blm)) 

hr_cct$blmrv = 
  hr_cct$date - min(hr_cct$date[hr_cct$blm == 1])

# arrest rate

ar_cct = rpf %>% 
  group_by(date) %>% 
  summarize(arrest = mean(arrest),
            blm = mean(blm)) 

ar_cct$blmrv = 
  ar_cct$date - min(ar_cct$date[ar_cct$blm == 1])

# b/w, l/w stop rate ratio

rpf_rr = rpf %>% 
  group_by(date) %>% 
  summarize(r_blk = sum(r_blk),
            r_lat = sum(r_lat),
            r_wht = sum(r_wht),
            blm = mean(blm)) %>% 
  mutate(r_blk_rate = (r_blk / 67329),
         r_wht_rate = (r_wht / 453034),
         r_lat_rate = (r_lat / 312603)) %>% 
  mutate(rr_bw = r_blk_rate / r_wht_rate,
         rr_lw = r_lat_rate / r_wht_rate) 

rpf_rr$blmrv = 
  rpf_rr$date - min(rpf_rr$date[rpf_rr$blm == 1])

# against person, property, society

crime_atx_ts = crime_atx %>% 
  mutate(crime_person = ifelse(crime_category == "person", 1, 0),
         crime_property = ifelse(crime_category == "property", 1, 0),
         crime_society = ifelse(crime_category == "society", 1, 0)) %>% 
  group_by(date) %>% 
  summarize(blm = mean(blm, na.rm = TRUE),
            crime_person = sum(crime_person, na.rm = TRUE),
            crime_property = sum(crime_property, na.rm = TRUE),
            crime_society = sum(crime_society, na.rm = TRUE)) %>% 
  filter(date >= as.Date("2020-03-23"))

crime_atx_ts$blmrv = 
  crime_atx_ts$date - min(crime_atx_ts$date[crime_atx_ts$blm == 1])

# loop 

polys = c(0, 1, 2, 3)
kernz = c("tri", "uni", "epa")
datasets = list(stops_cct %>% as.data.frame,
                hr_cct %>% as.data.frame,
                ar_cct %>% as.data.frame,
                rpf_rr %>% as.data.frame,
                rpf_rr %>% as.data.frame, 
                crime_atx_ts %>% as.data.frame,
                crime_atx_ts %>% as.data.frame,
                crime_atx_ts %>% as.data.frame)
outcomes = 
  c("count_stops", "search_found", "arrest", 'rr_bw', 'rr_lw',
    'crime_person', "crime_property", "crime_society")

dataset_list = as.list(rep(NA, length(datasets)))

for (i in 1:length(datasets)) {
  
  print(paste0("I iteration ", i))
  polys_list = as.list(rep(NA, length(polys)))
  
  for (j in 1:length(polys)) {
    
    print(paste0("J iteration ", j))
    
    kerns_matrix = matrix(NA, ncol = 6, nrow = length(kernz))
    
    for (k in 1:length(kernz)) {
      
      print(paste0("K iteration ", k))
      out = rdrobust(y = as.numeric(datasets[[i]][, outcomes[i]]),
                     x = datasets[[i]]$blmrv, 
                     p = polys[j],
                     kernel = kernz[k])
      
      kerns_matrix[k, 1] = out$coef[3]
      kerns_matrix[k, 2] = out$se[3]
      kerns_matrix[k, 3] = out$pv[3]
      kerns_matrix[k, 4] = outcomes[i]
      kerns_matrix[k, 5] = polys[j]
      kerns_matrix[k, 6] = kernz[k]
      
    }
    
    polys_list[[j]] = 
      kerns_matrix %>% 
      as.data.frame %>% 
      `colnames<-` (c("est", "se", "pv", "outcomes", "poly", "kern")) %>% 
      mutate(est = as.numeric(as.character(est)),
             se = as.numeric(as.character(se)),
             pv = as.numeric(as.character(pv)),
             outcomes = as.character(outcomes),
             poly = as.numeric(as.character(poly)),
             kern = as.character(kern))
    
  }
  
  dataset_list[[i]] = polys_list
  
}

cct_estimates = 
  dataset_list %>% 
  unlist(recursive = FALSE) %>% 
  do.call(rbind.data.frame, .) %>% 
  mutate(kernpoly = paste0(kern, ", ", poly)) %>% 
  mutate(kernpoly = factor(kernpoly, 
                           levels = c("tri, 0", "tri, 1", "tri, 2", "tri, 3",
                                      "uni, 0", "uni, 1", "uni, 2", "uni, 3",
                                      "epa, 0", "epa, 1", "epa, 2", "epa, 3"),
                           labels = c("Tri.,\n0", "Tri.,\n1", "Tri.,\n2", "Tri.,\n3",
                                      "Uni.,\n0", "Uni.,\n1", "Uni.,\n2", "Uni.,\n3",
                                      "Epa.,\n0", "Epa.,\n1", "Epa.,\n2", "Epa.,\n3"))) %>% 
  mutate(outcomes = factor(outcomes,
                           levels =   c("count_stops", "search_found", "arrest", 'rr_bw', 'rr_lw',
                                        'crime_person', "crime_property", "crime_society"),
                           labels = c("A. Traffic Stops", "B. Hit Rate", "C. Arrest Rate",
                                      "D. B/W Rate Ratio", "E. L/W Rate Ratio", 
                                      "E. Crime (Against Person)", "F. Crime (Property)", "G. Crime (Against Society)"))) 


cct_estimates = 
  cct_estimates %>% filter(!grepl(x = outcomes, "L/W Rate Ratio"))

cct_estimates %>% 
  ggplot() + 
  geom_point(aes(x = kernpoly, y = est)) +
  geom_errorbar(aes(x = kernpoly,
                    ymin = est - 1.96 * se,
                    ymax = est + 1.96 * se),
                size = .2,
                width = 0) + 
  geom_errorbar(aes(x = kernpoly,
                    ymin = est - 1.645 * se,
                    ymax = est + 1.645 * se),
                size = .4,
                width = 0) + 
  geom_hline(yintercept = 0) + 
  facet_wrap(~outcomes, scales = "free", ncol = 4) + 
  labs(x = "Kernel, Polynomial",
       y = "RDiT Coefficient (BLM)") + 
  theme_tufte(base_size = 10) + 
  theme(axis.text.x = element_text(size = 3.5))

ggsave(plot = last_plot(), filename = "pics/cctresatx.png", width = 8, height = 3.5)

#### rdit: 10-65 day bandwidth ####

# loop 

polys = c(0, 1, 2, 3)
kernz = c("tri", "uni", "epa")
datasets = list(stops_cct %>% as.data.frame,
                hr_cct %>% as.data.frame,
                ar_cct %>% as.data.frame,
                rpf_rr %>% as.data.frame,
                rpf_rr %>% as.data.frame, 
                crime_atx_ts %>% as.data.frame,
                crime_atx_ts %>% as.data.frame,
                crime_atx_ts %>% as.data.frame)
outcomes = 
  c("count_stops", "search_found", "arrest", 'rr_bw', 'rr_lw',
    'crime_person', "crime_property", "crime_society")

dataset_list = as.list(rep(NA, length(datasets)))

for (i in 1:length(datasets)) {
  
  print(paste0("I iteration ", i))
  polys_list = as.list(rep(NA, length(polys)))
  
  for (j in 1:length(polys)) {
    
    print(paste0("J iteration ", j))
    
    # kerns_matrix = matrix(NA, ncol = 6, nrow = length(kernz))
    kerns_list = as.list(rep(NA, length(kernz)))
    
    for (k in 1:length(kernz)) {
      
      print(paste0("K iteration ", k))
      
      bw_matrix = matrix(NA, ncol = 7, nrow = length(10:100))
      
      for (l in 1:length(10:100)) {
        
        print(paste0("L iteration ", l))
        out = rdrobust(y = as.numeric(datasets[[i]][, outcomes[i]]),
                       x = datasets[[i]]$blmrv, 
                       p = polys[j],
                       kernel = kernz[k],
                       h = (10:100)[l])
        
        bw_matrix[l, 1] = out$coef[3]
        bw_matrix[l, 2] = out$se[3]
        bw_matrix[l, 3] = out$pv[3]
        bw_matrix[l, 4] = outcomes[i]
        bw_matrix[l, 5] = polys[j]
        bw_matrix[l, 6] = kernz[k]
        bw_matrix[l, 7] = (10:100)[l]
        
      }
      
      kerns_list[[k]] = bw_matrix %>% 
        as.data.frame %>% 
        `colnames<-` (c("est", "se", "pv", "outcomes", "poly", "kern", "bw")) %>% 
        mutate(est = as.numeric(as.character(est)),
               se = as.numeric(as.character(se)),
               pv = as.numeric(as.character(pv)),
               outcomes = as.character(outcomes),
               poly = as.numeric(as.character(poly)),
               kern = as.character(kern),
               bw = as.numeric(as.character(bw)))
      
    }
    
    polys_list[[j]] = kerns_list
    
  }
  
  dataset_list[[i]] = polys_list
  
}

cct_estimates = 
  dataset_list %>% 
  unlist(recursive = FALSE) %>%
  unlist(recursive = FALSE) %>% 
  do.call(rbind.data.frame, .) %>% 
  mutate(kernpoly = paste0(kern, ", ", poly)) %>% 
  mutate(kernpoly = factor(kernpoly, 
                           levels = c("tri, 0", "tri, 1", "tri, 2", "tri, 3",
                                      "uni, 0", "uni, 1", "uni, 2", "uni, 3",
                                      "epa, 0", "epa, 1", "epa, 2", "epa, 3"),
                           labels = c("Tri., 0", "Tri., 1", "Tri., 2", "Tri., 3",
                                      "Uni., 0", "Uni., 1", "Uni., 2", "Uni., 3",
                                      "Epa., 0", "Epa., 1", "Epa., 2", "Epa., 3"))) %>% 
  mutate(outcomes = factor(outcomes,
                           levels =   c("count_stops", "search_found", "arrest", 'rr_bw', 'rr_lw',
                                        'crime_person', "crime_property", "crime_society"),
                           labels = c("A. Traffic Stops", "B. Hit Rate", "C. Arrest Rate",
                                      "D. B/W Rate Ratio", "E. L/W Rate Ratio", 
                                      "G. Crime (Against Person)", "H. Crime (Property)", "I. Crime (Against Society)"))) 

the_outs = c("A. Traffic Stops", "B. Hit Rate", "C. Arrest Rate",
             "D. B/W Rate Ratio", "E. L/W Rate Ratio", 
             "G. Crime (Against Person)", "H. Crime (Property)", "I. Crime (Against Society)")

the_outs_labs = c("Traffic Stops", "Hit Rate", "Arrest Rate",
                  "B/W Rate Ratio", "L/W Rate Ratio", 
                  "Crime (Against Person)", "Crime (Property)", "Crime (Against Society)")

the_outs_plot_list = as.list(rep(NA, length(the_outs_labs)))

for (i in 1:length(the_outs_labs)) {
  
  print(paste0("Iteration ", i))
  
  the_outs_plot_list[[i]] = 
    cct_estimates %>%
    filter(outcomes == the_outs[i]) %>% 
    ggplot() +
    geom_point(aes(x = bw, y = est),
               size = .1,
               alpha = .4) +
    geom_errorbar(aes(x = bw, 
                      ymin = est - 1.96 * se,
                      ymax = est + 1.96 * se),
                  width = 0,
                  size = .1) +
    geom_errorbar(aes(x = bw, 
                      ymin = est - 1.645 * se,
                      ymax = est + 1.645 * se),
                  width = 0,
                  size = .2) + 
    geom_hline(yintercept = 0) + 
    facet_wrap(~ kernpoly, scales = "free") + 
    labs(x = "Bandwidth", y = "RDiT Coefficient\n(BLM Protest)",
         title = the_outs_labs[i]) + 
    theme_tufte(base_size = 9)
  
}

ggsave(plot = the_outs_plot_list[[1]], filename = "pics/c2bwpatx1.png", width = 8, height = 6.25)
ggsave(plot = the_outs_plot_list[[2]], filename = "pics/c2bwpatx2.png", width = 8, height = 6.25)
ggsave(plot = the_outs_plot_list[[3]], filename = "pics/c2bwpatx3.png", width = 8, height = 6.25)
ggsave(plot = the_outs_plot_list[[4]], filename = "pics/c2bwpatx4.png", width = 8, height = 6.25)
ggsave(plot = the_outs_plot_list[[5]], filename = "pics/c2bwpatx5.png", width = 8, height = 6.25)
ggsave(plot = the_outs_plot_list[[6]], filename = "pics/c2bwpatx6.png", width = 8, height = 6.25)
ggsave(plot = the_outs_plot_list[[7]], filename = "pics/c2bwpatx7.png", width = 8, height = 6.25)
ggsave(plot = the_outs_plot_list[[8]], filename = "pics/c2bwpatx8.png", width = 8, height = 6.25)


#### rdit: shaving 100 days after the protest #### 

# loop 

polys = c(0, 1, 2, 3)
kernz = c("tri", "uni", "epa")
datasets = list(stops_cct %>% as.data.frame,
                hr_cct %>% as.data.frame,
                ar_cct %>% as.data.frame,
                rpf_rr %>% as.data.frame,
                rpf_rr %>% as.data.frame, 
                crime_atx_ts %>% as.data.frame,
                crime_atx_ts %>% as.data.frame,
                crime_atx_ts %>% as.data.frame)
outcomes = 
  c("count_stops", "search_found", "arrest", 'rr_bw', 'rr_lw',
    'crime_person', "crime_property", "crime_society")

dataset_list = as.list(rep(NA, length(datasets)))

for (i in 1:length(datasets)) {
  
  print(paste0("I iteration ", i))
  polys_list = as.list(rep(NA, length(polys)))
  
  for (j in 1:length(polys)) {
    
    print(paste0("J iteration ", j))
    
    # kerns_matrix = matrix(NA, ncol = 6, nrow = length(kernz))
    kerns_list = as.list(rep(NA, length(kernz)))
    
    for (k in 1:length(kernz)) {
      
      print(paste0("K iteration ", k))
      
      bw_matrix = matrix(NA, ncol = 7, nrow = length(1:100))
      
      for (l in 1:length(1:100)) {
        
        print(paste0("L iteration ", l))
        dftouse = 
          datasets[[i]] %>% 
          filter(blmrv <= -1 | blmrv >= l)
        
        dftouse$blmrv = 
          ifelse(dftouse$blmrv >= 0, dftouse$blmrv - l, dftouse$blmrv)
        
        out = rdrobust(y = as.numeric(dftouse[, outcomes[i]]),
                       x = dftouse$blmrv, 
                       p = polys[j],
                       kernel = kernz[k])
        
        bw_matrix[l, 1] = out$coef[3]
        bw_matrix[l, 2] = out$se[3]
        bw_matrix[l, 3] = out$pv[3]
        bw_matrix[l, 4] = outcomes[i]
        bw_matrix[l, 5] = polys[j]
        bw_matrix[l, 6] = kernz[k]
        bw_matrix[l, 7] = (1:100)[l]
        
      }
      
      kerns_list[[k]] = bw_matrix %>% 
        as.data.frame %>% 
        `colnames<-` (c("est", "se", "pv", "outcomes", "poly", "kern", "days_cut")) %>% 
        mutate(est = as.numeric(as.character(est)),
               se = as.numeric(as.character(se)),
               pv = as.numeric(as.character(pv)),
               outcomes = as.character(outcomes),
               poly = as.numeric(as.character(poly)),
               kern = as.character(kern),
               days_cut = as.numeric(as.character(days_cut)))
      
    }
    
    polys_list[[j]] = kerns_list
    
  }
  
  dataset_list[[i]] = polys_list
  
}

cct_estimates = 
  dataset_list %>% 
  unlist(recursive = FALSE) %>%
  unlist(recursive = FALSE) %>% 
  do.call(rbind.data.frame, .) %>% 
  mutate(kernpoly = paste0(kern, ", ", poly)) %>% 
  mutate(kernpoly = factor(kernpoly, 
                           levels = c("tri, 0", "tri, 1", "tri, 2", "tri, 3",
                                      "uni, 0", "uni, 1", "uni, 2", "uni, 3",
                                      "epa, 0", "epa, 1", "epa, 2", "epa, 3"),
                           labels = c("Tri., 0", "Tri., 1", "Tri., 2", "Tri., 3",
                                      "Uni., 0", "Uni., 1", "Uni., 2", "Uni., 3",
                                      "Epa., 0", "Epa., 1", "Epa., 2", "Epa., 3"))) %>% 
  mutate(outcomes = factor(outcomes,
                           levels =   c("count_stops", "search_found", "arrest", 'rr_bw', 'rr_lw',
                                        'crime_person', "crime_property", "crime_society"),
                           labels = c("A. Traffic Stops", "B. Hit Rate", "C. Arrest Rate",
                                      "D. B/W Rate Ratio", "E. L/W Rate Ratio", 
                                      "G. Crime (Against Person)", "H. Crime (Property)", "I. Crime (Against Society)"))) 

the_outs = c("A. Traffic Stops", "B. Hit Rate", "C. Arrest Rate",
             "D. B/W Rate Ratio", "E. L/W Rate Ratio", 
             "G. Crime (Against Person)", "H. Crime (Property)", "I. Crime (Against Society)")

the_outs_labs = c("Traffic Stops", "Hit Rate", "Arrest Rate",
                  "B/W Rate Ratio", "L/W Rate Ratio", 
                  "Crime (Against Person)", "Crime (Property)", "Crime (Against Society)")

the_outs_plot_list = as.list(rep(NA, length(the_outs_labs)))

for (i in 1:length(the_outs_labs)) {
  
  print(paste0("Iteration ", i))
  
  the_outs_plot_list[[i]] = 
    cct_estimates %>%
    filter(outcomes == the_outs[i]) %>% 
    ggplot() +
    geom_point(aes(x = days_cut, y = est),
               size = .1,
               alpha = .4) +
    geom_errorbar(aes(x = days_cut, 
                      ymin = est - 1.96 * se,
                      ymax = est + 1.96 * se),
                  width = 0,
                  size = .1) +
    geom_errorbar(aes(x = days_cut, 
                      ymin = est - 1.645 * se,
                      ymax = est + 1.645 * se),
                  width = 0,
                  size = .2) + 
    geom_hline(yintercept = 0) + 
    facet_wrap(~ kernpoly, scales = "free") + 
    labs(x = "Days Cut", y = "RDiT Coefficient\n(BLM Protest)",
         title = the_outs_labs[i]) + 
    theme_tufte(base_size = 9)
  
}

ggsave(plot = the_outs_plot_list[[1]], filename = "pics/dcpatx1.png", width = 8, height = 6.25)
ggsave(plot = the_outs_plot_list[[2]], filename = "pics/dcpatx2.png", width = 8, height = 6.25)
ggsave(plot = the_outs_plot_list[[3]], filename = "pics/dcpatx3.png", width = 8, height = 6.25)
ggsave(plot = the_outs_plot_list[[4]], filename = "pics/dcpatx4.png", width = 8, height = 6.25)
ggsave(plot = the_outs_plot_list[[5]], filename = "pics/dcpatx5.png", width = 8, height = 6.25)
ggsave(plot = the_outs_plot_list[[6]], filename = "pics/dcpatx6.png", width = 8, height = 6.25)
ggsave(plot = the_outs_plot_list[[7]], filename = "pics/dcpatx7.png", width = 8, height = 6.25)
ggsave(plot = the_outs_plot_list[[8]], filename = "pics/dcpatx8.png", width = 8, height = 6.25)

#### rdit: temporal placebo test #### 

# setting up loop

# reproduce crime 

crime_atx_ts = crime_atx %>% 
  mutate(crime_person = ifelse(crime_category == "person", 1, 0),
         crime_property = ifelse(crime_category == "property", 1, 0),
         crime_society = ifelse(crime_category == "society", 1, 0)) %>% 
  group_by(date) %>% 
  summarize(blm = mean(blm, na.rm = TRUE),
            crime_person = sum(crime_person, na.rm = TRUE),
            crime_property = sum(crime_property, na.rm = TRUE),
            crime_society = sum(crime_society, na.rm = TRUE)) 

crime_atx_ts$blmrv = 
  crime_atx_ts$date - min(crime_atx_ts$date[crime_atx_ts$blm == 1])


polys = c(0, 1, 2, 3)
kernz = c("uni")
datasets = list(stops_cct %>% as.data.frame,
                hr_cct %>% as.data.frame,
                ar_cct %>% as.data.frame,
                rpf_rr %>% as.data.frame,
                rpf_rr %>% as.data.frame, 
                crime_atx_ts %>% as.data.frame,
                crime_atx_ts %>% as.data.frame,
                crime_atx_ts %>% as.data.frame)
outcomes = 
  c("count_stops", "search_found", "arrest", 'rr_bw', 'rr_lw',
    'crime_person', "crime_property", "crime_society")

dataset_list = as.list(rep(NA, length(datasets)))

the_lag_max = as.numeric(as.Date("2020-05-29") - min(datasets[[1]]$date)) - 30

# loop 

for (i in 1:length(datasets)) {
  
  print(paste0("I iteration ", i))
  polys_list = as.list(rep(NA, length(polys)))
  
  for (j in 1:length(polys)) {
    
    print(paste0("J iteration ", j))
    
    kerns_list = as.list(rep(NA, length(kernz)))
    
    for (k in 1:length(kernz)) {
      
      print(paste0("K iteration ", j))
      the_lag_max = (as.numeric(as.Date("2020-05-29") - 
                                  min(datasets[[i]]$date))) - 30
      the_lag_max2 = as.Date(min(datasets[[i]]$date) + 30)
      lagmat = matrix(NA, ncol = 9, nrow = the_lag_max) %>% 
        as.data.frame %>% 
        `colnames<-` (c("est", "se", "pv", "outcomes", "poly",
                        "kern", "lag", "lagmax", "lagmaxdate")) 
      
      for (z in 30:the_lag_max) {
        
        print(paste0("Z iteration ", z))
        
        datasets[[i]]$blmrv_fake = datasets[[i]]$date - 
          datasets[[i]]$date[lead(datasets[[i]]$blmrv, z) == 0 & 
                               !is.na(lead(datasets[[i]]$blmrv, z))]
        
        out = rdrobust(y = as.numeric(datasets[[i]][, outcomes[i]]),
                       x = datasets[[i]]$blmrv_fake, 
                       p = polys[j],
                       kernel = kernz[k])
        
        lagmat[z, 1] = out$coef[3]
        lagmat[z, 2] = out$se[3]
        lagmat[z, 3] = out$pv[3]
        lagmat[z, 4] = outcomes[i]
        lagmat[z, 5] = polys[j]
        lagmat[z, 6] = kernz[k]
        lagmat[z, 7] = z
        lagmat[z, 8] = the_lag_max
        lagmat$lagmaxdate[z] = as.Date(the_lag_max2)
        
      }
      
      kerns_list[[k]] = lagmat %>% filter(!is.na(est)) %>% 
        mutate(lagmaxdate = as.Date(lagmaxdate, origin = "1970-01-01"))
      
    }
    
    polys_list[[j]] = 
      kerns_list 
    
  }
  
  dataset_list[[i]] = polys_list
  
}

plc_test_out_aus = dataset_list
save(x = plc_test_out_aus, file = "data/plctest_aus.RData")
load('data/plctest_aus.RData')

# re-running loop without placebos to serve as benchmarks

# loop 

polys = c(0, 1, 2, 3)
kernz = c("tri", "uni", "epa")
datasets = list(stops_cct %>% as.data.frame,
                hr_cct %>% as.data.frame,
                ar_cct %>% as.data.frame,
                rpf_rr %>% as.data.frame,
                rpf_rr %>% as.data.frame, 
                crime_atx_ts %>% as.data.frame,
                crime_atx_ts %>% as.data.frame,
                crime_atx_ts %>% as.data.frame)
outcomes = 
  c("count_stops", "search_found", "arrest", 'rr_bw', 'rr_lw',
    'crime_person', "crime_property", "crime_society")

dataset_list = as.list(rep(NA, length(datasets)))

for (i in 1:length(datasets)) {
  
  print(paste0("I iteration ", i))
  polys_list = as.list(rep(NA, length(polys)))
  
  for (j in 1:length(polys)) {
    
    print(paste0("J iteration ", j))
    
    kerns_matrix = matrix(NA, ncol = 6, nrow = length(kernz))
    
    for (k in 1:length(kernz)) {
      
      print(paste0("K iteration ", k))
      out = rdrobust(y = as.numeric(datasets[[i]][, outcomes[i]]),
                     x = datasets[[i]]$blmrv, 
                     p = polys[j],
                     kernel = kernz[k])
      
      kerns_matrix[k, 1] = out$coef[3]
      kerns_matrix[k, 2] = out$se[3]
      kerns_matrix[k, 3] = out$pv[3]
      kerns_matrix[k, 4] = outcomes[i]
      kerns_matrix[k, 5] = polys[j]
      kerns_matrix[k, 6] = kernz[k]
      
    }
    
    polys_list[[j]] = 
      kerns_matrix %>% 
      as.data.frame %>% 
      `colnames<-` (c("est", "se", "pv", "outcomes", "poly", "kern")) %>% 
      mutate(est = as.numeric(as.character(est)),
             se = as.numeric(as.character(se)),
             pv = as.numeric(as.character(pv)),
             outcomes = as.character(outcomes),
             poly = as.numeric(as.character(poly)),
             kern = as.character(kern))
    
  }
  
  dataset_list[[i]] = polys_list
  
}

cct_estimates = 
  dataset_list %>% 
  unlist(recursive = FALSE) %>% 
  do.call(rbind.data.frame, .) %>% 
  mutate(kernpoly = paste0(kern, ", ", poly)) %>% 
  mutate(kernpoly = factor(kernpoly, 
                           levels = c("tri, 0", "tri, 1", "tri, 2", "tri, 3",
                                      "uni, 0", "uni, 1", "uni, 2", "uni, 3",
                                      "epa, 0", "epa, 1", "epa, 2", "epa, 3"),
                           labels = c("Tri.,\n0", "Tri.,\n1", "Tri.,\n2", "Tri.,\n3",
                                      "Uni.,\n0", "Uni.,\n1", "Uni.,\n2", "Uni.,\n3",
                                      "Epa.,\n0", "Epa.,\n1", "Epa.,\n2", "Epa.,\n3"))) %>% 
  mutate(outcomes = factor(outcomes,
                           levels =   c("count_stops", "search_found", "arrest", 'rr_bw', 'rr_lw',
                                        'crime_person', "crime_property", "crime_society"),
                           labels = c("A. Traffic Stops", "B. Hit Rate", "C. Arrest Rate",
                                      "D. B/W Rate Ratio", "E. L/W Rate Ratio", 
                                      "G. Crime (Against Person)", "H. Crime (Property)", "I. Crime (Against Society)"))) %>% 
  filter(grepl(x = kernpoly, "^Uni."))

# placebo test plots 

plc_test_out_aus_df = bind_rows(
  plc_test_out_aus[[1]] %>% bind_rows(),
  plc_test_out_aus[[2]] %>% bind_rows(),
  plc_test_out_aus[[3]] %>% bind_rows(),
  plc_test_out_aus[[4]] %>% bind_rows(),
  plc_test_out_aus[[5]] %>% bind_rows(),
  plc_test_out_aus[[6]] %>% bind_rows(),
  plc_test_out_aus[[7]] %>% bind_rows(),
  plc_test_out_aus[[8]] %>% bind_rows()
) %>% 
  mutate(outcomes = factor(outcomes,
                           levels = c("count_stops", "search_found", "arrest", 
                                      "rr_bw", "rr_lw", "crime_person",
                                      "crime_property", "crime_society"),
                           labels = c("Traffic Stops", "Hit Rate", "Arrest Rate",
                                      "B/W Rate Ratio (Traffic)", "L/W Rate Ratio (Traffic)", 
                                      "Violent Crime", "Property Crime", "Society Crime")))

plc_test_out_aus_df$outcomes_poly = 
  factor(paste0(plc_test_out_aus_df$outcomes, ", ", plc_test_out_aus_df$poly),
         levels = unique(paste0(plc_test_out_aus_df$outcomes, ", ",
                                plc_test_out_aus_df$poly)))

plc_test_out_aus_df$outcomes_poly = 
  factor(plc_test_out_aus_df$outcomes_poly, 
         levels = unique(plc_test_out_aus_df$outcomes_poly))

cct_estimates$outcomes_poly = unique(plc_test_out_aus_df$outcomes_poly)

# before plotting, figuring out the p-value

outcomes_poly_plc = 
  rep(NA, length(unique(plc_test_out_aus_df$outcomes_poly)))

for (i in 1:length(unique(plc_test_out_aus_df$outcomes_poly))) {
  
  print(paste0("Iteration ", i))
  
  outcomes_poly_plc[i] = 
    mean(abs(cct_estimates$est[cct_estimates$outcomes_poly == 
                                  unique(cct_estimates$outcomes_poly)[i]]) >= 
           abs(plc_test_out_aus_df$est[plc_test_out_aus_df$outcomes_poly == 
                                          unique(plc_test_out_aus_df$outcomes_poly)[i]]))
  
}

plc_test_out_aus_df$outcomes_poly = 
  factor(plc_test_out_aus_df$outcomes_poly,
         levels = unique(plc_test_out_aus_df$outcomes_poly),
         labels = paste0(unique(plc_test_out_aus_df$outcomes_poly), ", ",
                         round(outcomes_poly_plc, 2)))

cct_estimates$outcomes_poly = 
  factor(cct_estimates$outcomes_poly,
         levels = unique(cct_estimates$outcomes_poly),
         labels = paste0(unique(cct_estimates$outcomes_poly), ", ",
                         round(outcomes_poly_plc, 2)))

plc_test_out_aus_df[grepl(x = plc_test_out_aus_df$outcomes, "Crime"), ]
plc_test_out_aus_df %>% 
  # filter(!grepl(x = outcomes, "Crime")) %>% 
  filter(!grepl(x = outcomes, "L/W")) %>% 
  ggplot() + 
  geom_histogram(aes(x = est)) + 
  facet_wrap(~ outcomes_poly, ncol = 4, scales = "free") +
  geom_vline(data = cct_estimates %>% filter(!grepl(x = outcomes, "Crime")) %>% filter(!grepl(x = outcomes, "L/W")),
             mapping = aes(xintercept = est)) +
  labs(x = "Placebo Coefficients", y = "Count") +  
  theme_tufte(base_size = 8)

ggsave(plot = last_plot(),
       filename = "pics/temp_plc_test_aus.png",
       width = 8, height = 6)

#### saving datasets for all city analysis #### 

datasets_atx = list(stops_cct %>% as.data.frame,
                hr_cct %>% as.data.frame,
                ar_cct %>% as.data.frame,
                rpf_rr %>% as.data.frame,
                rpf_rr %>% as.data.frame, 
                crime_atx_ts %>% as.data.frame,
                crime_atx_ts %>% as.data.frame,
                crime_atx_ts %>% as.data.frame)

save(x = datasets_atx, file = 'data/datasets_atx.Rdata')
